Visualizing harmful PM2.5 levels in the US by county¶
In [1]:
Copied!
# %pip install pandas geopandas folium matplotlib mapclassify
# %pip install pandas geopandas folium matplotlib mapclassify
In [2]:
Copied!
import pandas as pd
import geopandas as gpd
import pandas as pd
import geopandas as gpd
In [3]:
Copied!
# download the data from US NIH (https://hdpulse.nimhd.nih.gov/data-portal/physical/table?age=001&age_options=ageall_1&demo=234&demo_options=air_pollution_1&physicaltopic=002&physicaltopic_options=physical_2&race=00&race_options=raceall_1&sex=0&sex_options=sexboth_1&statefips=99&statefips_options=area_states)
county_pm25: pd.DataFrame = pd.read_csv(
"HDPulse_data_export.csv",
skiprows=5,
)
# download the data from US NIH (https://hdpulse.nimhd.nih.gov/data-portal/physical/table?age=001&age_options=ageall_1&demo=234&demo_options=air_pollution_1&physicaltopic=002&physicaltopic_options=physical_2∽̱=00∽̱_options=raceall_1&sex=0&sex_options=sexboth_1&statefips=99&statefips_options=area_states)
county_pm25: pd.DataFrame = pd.read_csv(
"HDPulse_data_export.csv",
skiprows=5,
)
In [4]:
Copied!
county_pm25
county_pm25
Out[4]:
| County | FIPS | Micrograms per cubic meter (PM2.5)(2) | |
|---|---|---|---|
| 0 | United States | 0.0 | 7.4 |
| 1 | San Bernardino County, California | 6071.0 | 15.6 |
| 2 | Fairbanks North Star, Alaska | 2090.0 | 15.5 |
| 3 | Allegheny County, Pennsylvania | 42003.0 | 14.1 |
| 4 | San Diego County, California | 6073.0 | 13.8 |
| ... | ... | ... | ... |
| 3146 | Notes: | NaN | NaN |
| 3147 | Source: National Environmental Public Health T... | NaN | NaN |
| 3148 | Average daily density of fine particulate matt... | NaN | NaN |
| 3149 | Some data are not available or suppressed due ... | NaN | NaN |
| 3150 | Note: This website still uses Connecticut coun... | NaN | NaN |
3151 rows × 3 columns
In [5]:
Copied!
county_pm25_processed: pd.DataFrame = (
county_pm25.assign(
# make PM2.5 reading a float
pm25_ug_per_m3=lambda x: pd.to_numeric(arg=x[x.keys()[-1]], errors="coerce"),
# convert FIPS to a 5-digit string
FIPS=lambda x: pd.to_numeric(x["FIPS"]),
)
.dropna(
# drop rows with missing PM2.5 readings
subset=[
"FIPS",
"pm25_ug_per_m3",
],
)
.assign(
FIPS=lambda x: x["FIPS"].astype(int).astype(str).str.zfill(5),
)
)
county_pm25_processed: pd.DataFrame = (
county_pm25.assign(
# make PM2.5 reading a float
pm25_ug_per_m3=lambda x: pd.to_numeric(arg=x[x.keys()[-1]], errors="coerce"),
# convert FIPS to a 5-digit string
FIPS=lambda x: pd.to_numeric(x["FIPS"]),
)
.dropna(
# drop rows with missing PM2.5 readings
subset=[
"FIPS",
"pm25_ug_per_m3",
],
)
.assign(
FIPS=lambda x: x["FIPS"].astype(int).astype(str).str.zfill(5),
)
)
In [6]:
Copied!
# optional sense check
county_pm25_processed
# optional sense check
county_pm25_processed
Out[6]:
| County | FIPS | Micrograms per cubic meter (PM2.5)(2) | pm25_ug_per_m3 | |
|---|---|---|---|---|
| 0 | United States | 00000 | 7.4 | 7.4 |
| 1 | San Bernardino County, California | 06071 | 15.6 | 15.6 |
| 2 | Fairbanks North Star, Alaska | 02090 | 15.5 | 15.5 |
| 3 | Allegheny County, Pennsylvania | 42003 | 14.1 | 14.1 |
| 4 | San Diego County, California | 06073 | 13.8 | 13.8 |
| ... | ... | ... | ... | ... |
| 3111 | Custer County, South Dakota | 46033 | 2.6 | 2.6 |
| 3112 | Apache County, Arizona | 04001 | 2.5 | 2.5 |
| 3113 | Campbell County, Wyoming | 56005 | 2.4 | 2.4 |
| 3114 | Converse County, Wyoming | 56009 | 2.2 | 2.2 |
| 3115 | Gallatin County, Montana | 30031 | 0.9 | 0.9 |
3116 rows × 4 columns
In [7]:
Copied!
# download us county shape files from https://www.census.gov/geographies/mapping-files/time-series/geo/carto-boundary-file.html
counties: gpd.GeoDataFrame = gpd.read_file(
"cb_2017_us_county_500k",
)
# download us county shape files from https://www.census.gov/geographies/mapping-files/time-series/geo/carto-boundary-file.html
counties: gpd.GeoDataFrame = gpd.read_file(
"cb_2017_us_county_500k",
)
In [8]:
Copied!
counties_processed: gpd.GeoDataFrame = counties.assign(
FIPS=lambda x: x["STATEFP"] + x["COUNTYFP"],
)
counties_processed: gpd.GeoDataFrame = counties.assign(
FIPS=lambda x: x["STATEFP"] + x["COUNTYFP"],
)
In [9]:
Copied!
# optional sense check
counties_processed
# optional sense check
counties_processed
Out[9]:
| STATEFP | COUNTYFP | COUNTYNS | AFFGEOID | GEOID | NAME | LSAD | ALAND | AWATER | AgriRegion | geometry | FIPS | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 01 | 005 | 00161528 | 0500000US01005 | 01005 | Barbour | 06 | 2292144656 | 50538698 | EastUS | POLYGON ((-85.74803 31.61918, -85.74544 31.618... | 01005 |
| 1 | 01 | 023 | 00161537 | 0500000US01023 | 01023 | Choctaw | 06 | 2365869837 | 19144469 | EastUS | POLYGON ((-88.47323 31.89386, -88.46888 31.930... | 01023 |
| 2 | 01 | 035 | 00161543 | 0500000US01035 | 01035 | Conecuh | 06 | 2201948618 | 6643480 | EastUS | POLYGON ((-87.4272 31.26436, -87.42551 31.2683... | 01035 |
| 3 | 01 | 051 | 00161551 | 0500000US01051 | 01051 | Elmore | 06 | 1601762124 | 99965171 | EastUS | POLYGON ((-86.41333 32.75059, -86.37115 32.750... | 01051 |
| 4 | 01 | 065 | 00161558 | 0500000US01065 | 01065 | Hale | 06 | 1667907107 | 32423356 | EastUS | POLYGON ((-87.87046 32.76244, -87.86818 32.765... | 01065 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 3228 | 37 | 069 | 01008553 | 0500000US37069 | 37069 | Franklin | 06 | 1273631713 | 7304032 | EastUS | POLYGON ((-78.54551 36.0567, -78.54493 36.0772... | 37069 |
| 3229 | 48 | 317 | 01383941 | 0500000US48317 | 48317 | Martin | 06 | 2369724595 | 1931832 | CentralRegion | POLYGON ((-102.21103 32.17704, -102.21111 32.3... | 48317 |
| 3230 | 54 | 107 | 01560558 | 0500000US54107 | 54107 | Wood | 06 | 948592039 | 27228519 | EastUS | POLYGON ((-81.75582 39.18052, -81.75575 39.180... | 54107 |
| 3231 | 13 | 269 | 00344156 | 0500000US13269 | 13269 | Taylor | 06 | 975612265 | 7802363 | EastUS | MULTIPOLYGON (((-84.05331 32.52202, -84.00849 ... | 13269 |
| 3232 | 01 | 003 | 00161527 | 0500000US01003 | 01003 | Baldwin | 06 | 4117605847 | 1133109409 | EastUS | POLYGON ((-88.02858 30.22676, -88.02399 30.230... | 01003 |
3233 rows × 12 columns
In [10]:
Copied!
# merge the two dataframes
counties_w_pm25 = counties_processed.merge(
right=county_pm25_processed,
on="FIPS",
how="left",
)
# merge the two dataframes
counties_w_pm25 = counties_processed.merge(
right=county_pm25_processed,
on="FIPS",
how="left",
)
In [11]:
Copied!
# optional sense check
counties_w_pm25
# optional sense check
counties_w_pm25
Out[11]:
| STATEFP | COUNTYFP | COUNTYNS | AFFGEOID | GEOID | NAME | LSAD | ALAND | AWATER | AgriRegion | geometry | FIPS | County | Micrograms per cubic meter (PM2.5)(2) | pm25_ug_per_m3 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 01 | 005 | 00161528 | 0500000US01005 | 01005 | Barbour | 06 | 2292144656 | 50538698 | EastUS | POLYGON ((-85.74803 31.61918, -85.74544 31.618... | 01005 | Barbour County, Alabama | 9.4 | 9.4 |
| 1 | 01 | 023 | 00161537 | 0500000US01023 | 01023 | Choctaw | 06 | 2365869837 | 19144469 | EastUS | POLYGON ((-88.47323 31.89386, -88.46888 31.930... | 01023 | Choctaw County, Alabama | 9.3 | 9.3 |
| 2 | 01 | 035 | 00161543 | 0500000US01035 | 01035 | Conecuh | 06 | 2201948618 | 6643480 | EastUS | POLYGON ((-87.4272 31.26436, -87.42551 31.2683... | 01035 | Conecuh County, Alabama | 9.2 | 9.2 |
| 3 | 01 | 051 | 00161551 | 0500000US01051 | 01051 | Elmore | 06 | 1601762124 | 99965171 | EastUS | POLYGON ((-86.41333 32.75059, -86.37115 32.750... | 01051 | Elmore County, Alabama | 10.0 | 10.0 |
| 4 | 01 | 065 | 00161558 | 0500000US01065 | 01065 | Hale | 06 | 1667907107 | 32423356 | EastUS | POLYGON ((-87.87046 32.76244, -87.86818 32.765... | 01065 | Hale County, Alabama | 9.4 | 9.4 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 3228 | 37 | 069 | 01008553 | 0500000US37069 | 37069 | Franklin | 06 | 1273631713 | 7304032 | EastUS | POLYGON ((-78.54551 36.0567, -78.54493 36.0772... | 37069 | Franklin County, North Carolina | 8.5 | 8.5 |
| 3229 | 48 | 317 | 01383941 | 0500000US48317 | 48317 | Martin | 06 | 2369724595 | 1931832 | CentralRegion | POLYGON ((-102.21103 32.17704, -102.21111 32.3... | 48317 | Martin County, Texas | 7.5 | 7.5 |
| 3230 | 54 | 107 | 01560558 | 0500000US54107 | 54107 | Wood | 06 | 948592039 | 27228519 | EastUS | POLYGON ((-81.75582 39.18052, -81.75575 39.180... | 54107 | Wood County, West Virginia | 7.8 | 7.8 |
| 3231 | 13 | 269 | 00344156 | 0500000US13269 | 13269 | Taylor | 06 | 975612265 | 7802363 | EastUS | MULTIPOLYGON (((-84.05331 32.52202, -84.00849 ... | 13269 | Taylor County, Georgia | 9.6 | 9.6 |
| 3232 | 01 | 003 | 00161527 | 0500000US01003 | 01003 | Baldwin | 06 | 4117605847 | 1133109409 | EastUS | POLYGON ((-88.02858 30.22676, -88.02399 30.230... | 01003 | Baldwin County, Alabama | 7.6 | 7.6 |
3233 rows × 15 columns
In [12]:
Copied!
# display the data on map by level of PM2.5
counties_w_pm25.explore(
column="pm25_ug_per_m3",
)
# display the data on map by level of PM2.5
counties_w_pm25.explore(
column="pm25_ug_per_m3",
)
Out[12]:
Make this Notebook Trusted to load map: File -> Trust Notebook